{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3f8ec7e2",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os, time, datetime\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import scipy.optimize as opt\n",
    "import matplotlib.pyplot as plt\n",
    "import multiprocessing as mp\n",
    "\n",
    "os.chdir(\"/Users/xiaosongw/Dropbox/Research/InformedSources/Replication/Analysis\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b6c8f41e",
   "metadata": {},
   "source": [
    "# Price "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a183dba7",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "df_p = pd.read_csv(\"../Build/Output/p_in.csv\")\n",
    "df_p['t'] = pd.to_datetime(df_p['t'])\n",
    "df_p['p'] = (df_p['pf'] / 100) * 35\n",
    "df_p = df_p[(df_p['t']>='2015-05-01')&(df_p['t']<='2017-12-31')]\n",
    "print(df_p['p'].mean())\n",
    "df_p.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "20929f81",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "l_dates = df_p['t'].unique()\n",
    "print(l_dates[0], l_dates[-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0d2f4410",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "df_in = df_p[['t', 'id', 'p', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6']].rename(columns={'sitecode':'id'}).copy()\n",
    "df_in.head(2)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4ba8103f",
   "metadata": {},
   "source": [
    "# Routes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "110edad8-0ff4-4056-a19d-3547e0f11fe6",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_n = pd.read_csv(\"../Build/Output/od_long_mel.csv\").rename(\n",
    "    columns={'tzn0':'tz0', 'tzn1':'tz1'})\n",
    "df_n.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "71696d90-1929-4b1f-b96d-c73cc58e2369",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_tz2tz = pd.read_csv(\"../Build/Output/tz2tz_out.csv\").rename(\n",
    "    columns={'tzn0':'tz0', 'tzn1':'tz1', 'distance':'dist', 'duration':'dur'})\n",
    "df_tz2tz.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1d3f650d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "df_r = pd.read_csv(\"../Build/Output/tz2st2tz_out.csv\")\n",
    "df_r['r'] = df_r.groupby(['tz0', 'tz1']).ngroup()\n",
    "df_r['dur'] = round(df_r['t'], 2)\n",
    "df_r = df_r.merge(df_n, on=['tz0', 'tz1'], how='left')\n",
    "df_r = df_r.merge(df_tz2tz[['tz0', 'tz1', 'dist']], on=['tz0', 'tz1'], how='left')\n",
    "df_r.head(4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "526e06e7",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "df_r_in = df_r[['r', 'id', 'n', 'dur', 'dist']].copy()\n",
    "tmp_r = df_r_in[['r', 'n', 'dist']].drop_duplicates().copy()\n",
    "tmp_r['n_w'] = tmp_r['n'] * tmp_r['dist']\n",
    "tmp_r['f'] = tmp_r['n_w'] / tmp_r['n_w'].sum() * 1e3\n",
    "df_r_in['f'] = df_r_in['r'].map(tmp_r.set_index('r')['f'].to_dict())\n",
    "df_r_in.head(2)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "50045c63",
   "metadata": {},
   "source": [
    "# market share"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "121338d5",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "data_bshr = pd.DataFrame({'year':[2015]*5 + [2017]*5, \n",
    "                          'shr': [0.1167, 0.15, 0.4444, 0.1833, 0.0389]+\n",
    "                     [0.1333, 0.1455, 0.297, 0.2121, 0.0788]})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9b01e695",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "l_d_pre = pd.date_range('2015-05-01', '2015-08-01')[0:-1]\n",
    "l_d_pos = pd.date_range('2017-05-01', '2017-08-01')[0:-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "41526cf8",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "l_d_in = np.concatenate([l_d_pre, l_d_pos], axis=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8e111200",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "df_in['post'] = np.nan\n",
    "df_in.loc[df_in['t'].isin(l_d_pre), 'post'] = 0\n",
    "df_in.loc[df_in['t'].isin(l_d_pos), 'post'] = 1"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f8f2c94f",
   "metadata": {},
   "source": [
    "# estimation\n",
    "\n",
    "cost of time 0.43 AUD or 43 cents per minute\n",
    "\n",
    "assume 35 litres of gas\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1c3e49e8",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "par_init = np.array([-1.31,  2.56,  2.71,  4.14,  2.9, 0.9])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e39fd148",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def model_mks(pars, df_r, df_in_t):\n",
    "    a, b = pars[0], pars[1:]\n",
    "    theta = np.concatenate([pars, np.array([0, 0.43*a])])\n",
    "    df_in_t = df_in_t.set_index('id')\n",
    "    df_ = df_r.merge(df_in_t, left_on='id', right_index=True, how='inner')\n",
    "    df_['u'] = df_[['p', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'dur']].dot(theta)\n",
    "    df_['expu'] = np.exp(df_['u'])\n",
    "    df_['deno'] = df_.groupby(['r'])['expu'].transform('sum')\n",
    "    df_['shr'] = df_['expu'] / df_['deno']\n",
    "    df_['shr_w'] = df_['shr'] * df_['f']\n",
    "    df_['elas'] = a * df_['p'] * (1 - df_['shr'])\n",
    "    df_['elas_w'] = df_['elas'] * df_['shr_w']\n",
    "    \n",
    "    df_['c_da'] = df_['shr'] * (df_['p'] + 0.43 * df_['dur'])\n",
    "    df_['dc_da'] = df_['c_da'] - df_['shr'] * (df_.groupby('r')['c_da'].transform('sum'))\n",
    "    df_['ds_da'] = df_['dc_da'] * df_['f']\n",
    "    \n",
    "    for ib in ['b1', 'b2', 'b3', 'b4', 'b5']:\n",
    "        df_['c_d'+ib] = df_['shr'] * df_[ib]\n",
    "        df_['dc_d'+ib] = (df_['c_d'+ib] - df_['shr'] * (df_.groupby('r')['c_d'+ib].transform('sum')))\n",
    "        df_['ds_d'+ib] = df_['dc_d'+ib] * df_['f']\n",
    "        \n",
    "    df_['delas_da'] = df_['p'] * (1 - df_['shr']) - a * df_['p'] * df_['dc_da']\n",
    "    df_['de_da1'] = (df_['dc_da'] * df_['elas'] + df_['shr'] * df_['delas_da']) * df_['f']\n",
    "    \n",
    "    for ib in ['b1', 'b2', 'b3', 'b4', 'b5']:\n",
    "        df_['delas_d'+ib] = - a * df_['p'] * df_['dc_d'+ib]\n",
    "        df_['de_d1'+ib] = (df_['dc_d'+ib] * df_['elas'] + df_['shr'] * df_['delas_d'+ib]) * df_['f']\n",
    "        \n",
    "    df_out_ = df_.groupby('id').agg({'t':'first', 'p':'first', 'post':'first', 'n':'sum', 'shr_w':'sum', 'elas_w':'sum', \n",
    "        'ds_da':'sum', 'ds_db1':'sum', 'ds_db2':'sum', 'ds_db3':'sum', 'ds_db4':'sum', 'ds_db5':'sum', \n",
    "        'de_da1':'sum', 'de_d1b1':'sum', 'de_d1b2':'sum', 'de_d1b3':'sum', 'de_d1b4':'sum', 'de_d1b5':'sum'}\n",
    "                                   ).reset_index()\n",
    "\n",
    "    df_out_['elas'] = df_out_['elas_w'] / df_out_['shr_w']\n",
    "    df_out_['de_da'] = df_out_['de_da1'] / df_out_['shr_w'] - df_out_['ds_da'] * df_out_['elas'] / df_out_['shr_w']\n",
    "    for ib in ['b1', 'b2', 'b3', 'b4', 'b5']:\n",
    "        df_out_['de_d'+ib] = (df_out_['de_d1'+ib] / df_out_['shr_w'] \n",
    "                              - df_out_['ds_d'+ib] * df_out_['elas'] / df_out_['shr_w'])\n",
    "        \n",
    "    return df_out_[['id', 't', 'post', 'p', 'n', 'shr_w', 'elas_w', 'ds_da', 'ds_db1', 'ds_db2',\n",
    "       'ds_db3', 'ds_db4', 'ds_db5', 'elas', 'de_da', 'de_db1', 'de_db2', 'de_db3',\n",
    "       'de_db4', 'de_db5']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "84fa2a1b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "df_pred0 = model_mks(par_init, df_r_in, df_in[(df_in['t']=='2016-08-01')])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c4a7842c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def model_moments(df_pred):\n",
    "    df_pred['bid'] = df_pred['id'].map(d_id_bn)\n",
    "    df_pred['year'] = df_pred['t'].dt.year\n",
    "    df_pred['month'] = df_pred['t'].dt.month\n",
    "    # 2015-05-01 - 2015-08-1\n",
    "    # 2017-05-01 - 2017-08-1\n",
    "    df_mks_y = df_pred.groupby(['post', 'bid']).agg({'shr_w':'sum', \n",
    "        'ds_da':'sum', 'ds_db1':'sum', 'ds_db2':'sum', 'ds_db3':'sum', 'ds_db4':'sum', 'ds_db5':'sum'}).reset_index()\n",
    "    df_mks_y['mks'] = df_mks_y['shr_w'] / df_mks_y.groupby('post')['shr_w'].transform('sum')\n",
    "    for ib in ['a', 'b1', 'b2', 'b3', 'b4', 'b5']:\n",
    "        df_mks_y['dmks_d'+ib] = ((df_mks_y['ds_d'+ib] \n",
    "                                  - df_mks_y['mks'] * df_mks_y.groupby('post')['ds_d'+ib].transform('sum')) \n",
    "                               / df_mks_y.groupby('post')['shr_w'].transform('sum'))\n",
    "    l_y = df_mks_y['post'].unique().tolist()    \n",
    "    l_mom = []\n",
    "    for iy in l_y:\n",
    "        l_mom.append(df_mks_y[(df_mks_y['post']==iy)].set_index('bid').loc[\n",
    "        ['BP', 'Caltex', 'Coles', 'Woolworths', '7-Eleven'], 'mks'].values)\n",
    "    mom_model = np.concatenate(l_mom).reshape(-1, 1)\n",
    "    \n",
    "    l_der_vec = []\n",
    "    for ib in ['a', 'b1', 'b2', 'b3', 'b4', 'b5']:\n",
    "        l_der_y = []\n",
    "        for iy in l_y:\n",
    "            l_der_y.append(df_mks_y[df_mks_y['post']==iy].set_index('bid').loc[\n",
    "                ['BP', 'Caltex', 'Coles', 'Woolworths', '7-Eleven'], 'dmks_d'+ib].values)\n",
    "        tmp = np.concatenate(l_der_y).reshape(-1, 1)\n",
    "        l_der_vec.append(tmp)\n",
    "    der_model = np.concatenate(l_der_vec, axis=1)\n",
    "    return mom_model, der_model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "865b97f4",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def data_moments(data_bshr):\n",
    "    mom_data = np.concatenate([data_bshr['shr'].values]).reshape(-1, 1)\n",
    "    return mom_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "93071a0d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def get_err_vec(mom_model, mom_data):\n",
    "    err_vec = (mom_model - mom_data) / mom_data\n",
    "    return err_vec"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ed1d47b0",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def gradient(der_model, err_vec, mom_data):\n",
    "    grad = np.sum(2 / mom_data * err_vec * der_model, axis=0).tolist()\n",
    "    return grad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "67f59420",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def obj_func_pool(pars, df_r, df_p, l_dates, data_bshr, W, pool):\n",
    "    args = [(pars, df_r, df_p[df_p['t']==d]) for d in l_dates]\n",
    "    df_pred = pd.concat(pool.starmap(model_mks, args), axis=0)\n",
    "    mom_model, der_model = model_moments(df_pred)\n",
    "    mom_data = data_moments(data_bshr)\n",
    "    err_vec = get_err_vec(mom_model, mom_data)\n",
    "    crit_val = np.dot(np.dot(err_vec.T, W), err_vec)\n",
    "    grad = gradient(der_model, err_vec, mom_data)\n",
    "    return crit_val.flatten(), grad"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "60081944",
   "metadata": {},
   "source": [
    "# run"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c5982394",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "logname = \"./Temp/log.txt\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bad4c798",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "Nfeval = 1\n",
    "start_time = time.time()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "51dfa0f4",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "with open(logname, \"a\") as f:\n",
    "    f.write('\\n\\n------------\\n'+time.strftime(\"%Y%m%d %H:%M\", time.localtime())+'\\n')\n",
    "    f.write('pre: from 2015-05-01 to 2015-08-01\\n')\n",
    "    f.write('post: from 2017-05-01 to 2017-08-01\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aa87bc1c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def callbackF(Xi):\n",
    "    global Nfeval\n",
    "    global start_time\n",
    "    str_out = '{0:4d}   {1: 3.2f}   {2: 3.2f}   {3: 3.2f}   {4: 3.2f}  {5:}\\n'.format(\n",
    "        Nfeval, Xi[0], Xi[1], Xi[2], Xi[3], time.strftime(\"%H:%M:%S\", time.gmtime((time.time() - start_time))))\n",
    "    print(str_out)\n",
    "    with open(logname, \"a\") as f:\n",
    "        f.write(str_out)\n",
    "    Nfeval += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8fa7c5ee",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "l_months_in = []\n",
    "for i in l_d_in:\n",
    "    j = pd.to_datetime(i).replace(day=1)\n",
    "    if j not in l_months_in:\n",
    "        l_months_in.append(j)\n",
    "n_months = len(l_months_in)\n",
    "print(n_months)\n",
    "print(l_d_in[0], '\\n', l_d_in[-1])\n",
    "print(len(l_d_in))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b6c2fe28",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "with open(logname, \"a\") as f:\n",
    "    f.write('initial parameters: '+str(par_init)+'\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f5d06870",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "with mp.Pool(8) as pool:\n",
    "    results = opt.minimize(obj_func_pool, par_init, \n",
    "        args=(df_r_in, df_in.loc[(df_in['t'].isin(l_d_in))], l_d_in, data_bshr, np.eye(10), pool), \n",
    "        method='L-BFGS-B', jac=True, callback=callbackF, options={'disp': True, 'maxiter':1e5})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "90c7e4de",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(logname, \"a\") as txt_file:\n",
    "    txt_file.write(\"parameters: {:.6f}, {:.6f}, {:.6f}, {:.6f}, {:.6f}, {:.6f}\\n\".format(\n",
    "        results.x[0], results.x[1], results.x[2], results.x[3], results.x[4], results.x[5]))\n",
    "    txt_file.write(str(results))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "40cbada6-f418-40bc-ae54-882c5ef5562a",
   "metadata": {},
   "outputs": [],
   "source": [
    "x = results.x\n",
    "df_par = pd.DataFrame({'par':x}, index=['p', 'BP', 'Caltex', 'Coles', 'Woolworths', '7-Eleven'])\n",
    "df_par.to_csv(\"./Output/pars.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "112e0600",
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "args = [(x, df_r_in, df_in[df_in['t']==d]) for d in l_dates]\n",
    "with mp.Pool(8) as pool:\n",
    "    df_pred = pd.concat(pool.starmap(model_mks, args), axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5640906c",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_pred.to_csv(\"./Output/calibration_mks_pre_post.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fdfc8eda-0ef2-4713-8830-163fd5f68c9f",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
